{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "import sktime\n",
    "from sktime.utils.data_io import load_from_tsfile_to_dataframe\n",
    "import data_loader as data\n",
    "\n",
    "DATA_PATH = 'data/'\n",
    "DATASET = 'RR'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "5492it [00:46, 118.38it/s]\n",
      "2420it [00:20, 120.81it/s]\n"
     ]
    }
   ],
   "source": [
    "X_train_orig, y_train_orig = data.load_from_tsfile_to_dataframe(\n",
    "    os.path.join(f'{DATA_PATH}/BIDMC32{DATASET}_TRAIN.ts'),\n",
    "    replace_missing_vals_with='NaN'\n",
    ")\n",
    "\n",
    "X_test_orig, y_test_orig = data.load_from_tsfile_to_dataframe(\n",
    "    os.path.join(f'{DATA_PATH}/BIDMC32{DATASET}_TEST.ts'),\n",
    "    replace_missing_vals_with='NaN'\n",
    ")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(5471, 2) <class 'pandas.core.frame.DataFrame'>\n",
      "(5471,) <class 'numpy.ndarray'>\n",
      "(2399, 2) <class 'pandas.core.frame.DataFrame'>\n",
      "(2399,) <class 'numpy.ndarray'>\n"
     ]
    }
   ],
   "source": [
    "print(X_train_orig.shape, type(X_train_orig))\n",
    "print(y_train_orig.shape, type(y_train_orig))\n",
    "print(X_test_orig.shape, type(X_test_orig))\n",
    "print(y_test_orig.shape, type(y_test_orig))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('float64')"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test_orig.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_data(X_train_orig, y_train_orig, X_test_orig, y_test_orig, shuffle=True, seed=42):\n",
    "    if shuffle:\n",
    "        X_all = pd.concat((X_train_orig, X_test_orig))\n",
    "        y_all = np.concatenate((y_train_orig, y_test_orig))\n",
    "        X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.15, random_state=seed)\n",
    "        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=seed)\n",
    "    else:\n",
    "        X_train, y_train = X_train_orig, y_train_orig\n",
    "        m = X_test_orig.shape[0] // 2\n",
    "        X_val, X_test = X_test_orig.iloc[:m, :], X_test_orig.iloc[m:, :]\n",
    "        y_val, y_test = y_test_orig.iloc[:m, :], y_test_orig.iloc[m:, :]\n",
    "    return X_train, y_train, X_val, y_val, X_test, y_test\n",
    "\n",
    "X_train, y_train, X_val, y_val, X_test, y_test = split_data(X_train_orig, y_train_orig, X_test_orig, y_test_orig, shuffle=True)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "def _to_numpy(X):\n",
    "    return np.stack([np.stack(x) for x in X.to_numpy()]).swapaxes(-1, -2)\n",
    "\n",
    "np.save(os.path.join(DATA_PATH, DATASET, \"trainx.npy\"), _to_numpy(X_train))\n",
    "np.save(os.path.join(DATA_PATH, DATASET, \"trainy.npy\"), y_train)\n",
    "np.save(os.path.join(DATA_PATH, DATASET, \"validx.npy\"), _to_numpy(X_val)-1)\n",
    "np.save(os.path.join(DATA_PATH, DATASET, \"validy.npy\"), y_val)\n",
    "np.save(os.path.join(DATA_PATH, DATASET, \"testx.npy\"), _to_numpy(X_test))\n",
    "np.save(os.path.join(DATA_PATH, DATASET, \"testy.npy\"), y_test)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(5685, 4000, 2)\n",
      "float64\n",
      "(5685,)\n",
      "float64\n",
      "(1004, 4000, 2)\n",
      "float64\n",
      "(1004,)\n",
      "float64\n",
      "(1181, 4000, 2)\n",
      "float64\n",
      "(1181,)\n",
      "float64\n"
     ]
    }
   ],
   "source": [
    "for file in ['trainx', 'trainy', 'validx', 'validy', 'testx', 'testy']:\n",
    "    a = np.load(f\"{DATA_PATH}/{DATASET}/{file}.npy\")\n",
    "    print(a.shape)\n",
    "    print(a.dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
